.\"	$OpenBSD: systrace.4,v 1.21 2014/06/14 00:24:38 matthew Exp $
.\"
.\" Copyright (c) 2002, 2003 CubeSoft Communications, Inc.
.\" All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\"    notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\"    notice, this list of conditions and the following disclaimer in the
.\"    documentation and/or other materials provided with the distribution.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
.\" INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
.\" (INCLUDING BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
.\" SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
.\" STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
.\" IN ANY WAY OUT OF THE USE OF THIS SOFTWARE EVEN IF ADVISED OF THE
.\" POSSIBILITY OF SUCH DAMAGE.
.\"
.Dd $Mdocdate: June 14 2014 $
.Dt SYSTRACE 4
.Os
.Sh NAME
.Nm systrace
.Nd enforce and generate policies for system calls
.Sh SYNOPSIS
.Cd "pseudo-device systrace" Op Ar count
.Sh DESCRIPTION
.Nm
attaches to processes and enforces policies for system calls.
A pseudo-device,
.Pa /dev/systrace ,
allows userland processes to control the behavior of
.Nm
through an
.Xr ioctl 2
interface.
.Sh SYSTEM CALL POLICIES
.Nm
can assign the following policies to system calls:
.Bl -tag -width SYSTR_POLICY_XXXXXX
.It Dv SYSTR_POLICY_ASK
Send a message of the type
.Dv SYSTR_MSG_ASK ,
and put the process to sleep until a
.Dv STRIOCANSWER
.Xr ioctl 2
is made.
.It Dv SYSTR_POLICY_PERMIT
Immediately allow the system call.
.It Dv SYSTR_POLICY_NEVER
Immediately return an error code.
.It Dv SYSTR_POLICY_KILL
Sends
.Dv SIGKILL
to the traced process.
.El
.Sh SYSTRACE MESSAGES
A
.Xr read 2
operation on the
.Nm
pseudo-device will block if there are no pending messages, or
return the following structure:
.Bd -literal
struct str_message {
	int msg_type;
#define SYSTR_MSG_ASK		1
#define SYSTR_MSG_RES		2
#define SYSTR_MSG_EMUL		3
#define SYSTR_MSG_CHILD		4
#define SYSTR_MSG_UGID		5
#define SYSTR_MSG_POLICYFREE	6
#define SYSTR_MSG_EXECVE	7
	pid_t msg_pid;
	u_int16_t msg_seqnr;	/* answer has to match seqnr */
	short msg_policy;
	union {
		struct str_msg_emul msg_emul;
		struct str_msg_ugid msg_ugid;
		struct str_msg_ask msg_ask;
		struct str_msg_child msg_child;
		struct str_msg_execve msg_execve;
	} msg_data;
};

struct str_msg_emul {
	char emul[SYSTR_EMULEN];
};

struct str_msg_ugid {
	uid_t uid;
	gid_t gid;
};

struct str_msg_execve {
	char path[MAXPATHLEN];
};

struct str_msg_ask {
	int code;
	int argsize;
	register_t args[SYSTR_MAXARGS];
	register_t rval[2];
	int result;
};

struct str_msg_child {
	pid_t new_pid;		/* -1 if child exited */
};
.Ed
.Pp
These messages are all sent to the userland control process.
.Bl -tag -width SYSTR_MSG_XXXXXXXXXX
.It SYSTR_MSG_ASK
This message is sent whenever the kernel does not have a cached
simple policy for system call number
.Va code
within the currently set emulation.
.It SYSTR_MSG_RES
This message is sent whenever a system call is flagged with
SYSTR_FLAGS_RESULT.
.It SYSTR_MSG_EMUL
This message is sent whenever the emulation of a process changes.
.It SYSTR_MSG_CHILD
This message is sent whenever a process gains or loses a child.
In the latter case, the event is raised when the child exits, but
not when it is reaped.
.It SYSTR_MSG_UGID
This message is sent whenever the effective UID or GID has changed
during the execution of a system call.
.It SYSTR_MSG_POLICYFREE
This is sent whenever the kernel frees the policy identified by
.Va msg_policy .
.It SYSTR_MSG_EXECVE
This message is sent whenever, before a call to
.Xr execve 2
a process is privileged (technically, the process has the PS_SUGID or
PS_SUGIDEXEC flag set),
but after the call these privileges have been dropped.
The new image name is specified in the
.Va path
argument.
.El
.Sh IOCTL INTERFACE
.Nm
supports the following
.Xr ioctl 2
commands:
.Bl -tag -width Ds
.It Dv STRIOCCLONE Fa "int *"
Return a
.Nm
file descriptor for
further
.Xr ioctl 2
operations.
The returned
.Nm
file descriptor is not inherited by a child created with
.Xr fork 2 .
Similarly, they cannot be passed across UNIX-domain sockets.
.It Dv STRIOCATTACH Fa "pid_t *"
Attach to a process, unless:
.Bl -enum -compact -width 2n
.It
It's the process that's doing the attaching.
.It
It's a system process.
.It
It's being traced already.
.It
You do not own the process and you're not root.
.It
It's
.Xr init 8 ,
and the
kernel was not compiled with
.Cd option INSECURE .
.El
.It Dv STRIOCDETACH Fa "pid_t *"
Wake up a process if it is waiting for an answer, and detach from it.
.It Dv STRIOCANSWER Fa "struct systrace_answer *"
Tell
.Nm
what to do with a system call that was assigned a policy of
.Dv SYSTR_POLICY_ASK .
.Bd -literal
struct systrace_answer {
	pid_t stra_pid;	    /* PID of process being traced */
	u_int16_t stra_seqnr;
	short reserved;
	uid_t stra_seteuid; /* Elevated privileges for syscall */
	uid_t stra_setegid;
	int stra_policy;    /* Policy to assign */
	int stra_error;	    /* Return value of denied syscall
			       (will return EPERM if zero) */
	int stra_flags;
#define	SYSTR_FLAGS_RESULT  0x001    /* Report syscall result */
#define SYSTR_FLAGS_SETEUID 0x002
#define SYSTR_FLAGS_SETEGID 0x004
};
.Ed
.It Dv STRIOCREPORT Fa "pid_t *"
Report the current emulation a process is using inside the
.Vt msg_emul
structure.
.It Dv STRIOCREPLACE Fa "struct systrace_replace *"
Arrange for system call arguments to be replaced by arguments
supplied by the monitoring process.
.Bd -literal
struct systrace_replace {
	pid_t strr_pid;
	u_int16_t strr_seqnr;
	int16_t reserved;
	int strr_nrepl;		/* # of arguments to replace */
	caddr_t	strr_base;		/* Base user memory */
	size_t strr_len;		/* Length of memory */
	int strr_argind[SYSTR_MAXARGS];	/* Argument indexes */
	size_t strr_off[SYSTR_MAXARGS];	/* Argument offsets */
	size_t strr_offlen[SYSTR_MAXARGS]; /* Argument sizes */
	int32_t strr_flags[SYSTR_MAXARGS];
};
.Ed
.It Dv STRIOCIO Fa "struct systrace_io *"
Copy data in/out of the process being traced.
.Bd -literal
struct systrace_io {
	pid_t strio_pid;    /* PID of process being traced */
	int strio_op;
#define	SYSTR_READ	1
#define	SYSTR_WRITE	2
	void *strio_offs;
	void *strio_addr;
	size_t strio_len;
};
.Ed
.It Dv STRIOCPOLICY Fa "struct systrace_policy *"
Manipulate the set of policies.
.Bd -literal
struct systrace_policy {
	int strp_op;
#define	SYSTR_POLICY_NEW	1  /* Allocate a new policy */
#define	SYSTR_POLICY_ASSIGN	2  /* Assign policy to process */
#define	SYSTR_POLICY_MODIFY	3  /* Modify an entry */
	int strp_num;
	union {
		struct {
			short code;
#define SYSTR_POLICY_ASK	0
#define SYSTR_POLICY_PERMIT	1
#define SYSTR_POLICY_NEVER	2
#define SYSTR_POLICY_KILL	3
			short policy;
		} assign;
		pid_t pid;
		int maxents;
	} strp_data;
#define strp_pid	strp_data.pid
#define strp_maxents	strp_data.maxents
#define strp_code	strp_data.assign.code
#define strp_policy	strp_data.assign.policy
};
.Ed
.Pp
The
.Dv SYSTR_POLICY_NEW
operation allocates a new policy of
.Va strp_maxents
entries with each initialized to
.Dv SYSTR_POLICY_ASK ,
and returns the new policy number into
.Va strp_num .
.Pp
The
.Dv SYSTR_POLICY_ASSIGN
operation attaches the policy identified by
.Va strp_num
to
.Va strp_pid ,
with a maximum of
.Va strp_maxents
entries.
.Pp
The
.Dv SYSTR_POLICY_MODIFY
operation changes the entry indexed by
.Va strp_code
to
.Va strp_policy .
.It Dv STRIOCGETCWD Fa "struct systrace_getcwd *"
Set the working directory of the calling process
to the directory associated with file descriptor
.Va strgd_atfd
in the process named by
.Va strgd_pid .
If
.Va strgd_atfd
is set to the special value
.Dv AT_FDCWD ,
then the current working directory of the named process
is used instead.
.Bd -literal
struct systrace_getcwd {
	pid_t strgd_pid;
	int   strgd_atfd;
};
.Ed
.It Dv STRIOCRESCWD
Restore the working directory of the current process.
.It Dv STRIOCINJECT Fa "struct systrace_inject *"
Inject a buffer into the stackgap of the traced process.
This accommodates for the manipulation of non-scalar arguments.
The actual replacement is not done until system call time,
and its presence in the stackgap is only guaranteed
for the duration of that system call.
.Bd -literal
struct systrace_inject {
	/* On return, this contains the stackgap address. */
	caddr_t stri_addr;
	size_t  stri_len;
	pid_t   stri_pid;
};
.Ed
.It Dv STRIOCSCRIPTNAME Fa "struct systrace_scriptname *"
Set the path of executed scripts to
.Va sn_scriptname .
.Bd -literal
struct systrace_scriptname {
	pid_t sn_pid;
	char  sn_scriptname[MAXPATHLEN];
};
.Ed
.El
.Sh FILES
.Bl -tag -width "/dev/systrace" -compact
.It Pa /dev/systrace
system call tracing facility
.El
.Sh EXAMPLES
The following is an example program that traces another process,
printing out the path to any
.Xr open 2
system calls it performs.
.Bd -literal
#include <sys/param.h>
#include <sys/ioctl.h>
#include <dev/systrace.h>

#include <err.h>
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>

/*
 * Number of system calls that will be covered in our policy.
 */
#define NSYSCALLS 512

int
main(int argc, char *argv[])
{
	struct systrace_policy strpol;
	struct systrace_answer strans;
	struct systrace_io strio;
	struct str_message strmsg;
	int fd, cfd, pid, i;
	ssize_t n;
	void *p;
	char c;

	if ((fd = open("/dev/systrace", O_RDONLY)) == -1)
		err(1, "/dev/systrace");

	/*
	 * Get a systrace descriptor.
	 */
	if (ioctl(fd, STRIOCCLONE, &cfd) == -1)
		err(1, "STRIOCCLONE");
	close(fd);

	/* Gather the PID of a process to systrace from somewhere. */
	/* ... */

	if (ioctl(cfd, STRIOCATTACH, &pid) == -1)
		err(1, "STRIOCATTACH");

	/* Install one policy. */
	strpol.strp_op = SYSTR_POLICY_NEW;
	strpol.strp_maxents = NSYSCALLS;

	if (ioctl(cfd, STRIOCPOLICY, &strpol) == -1)
		err(1, "STRIOCPOLICY NEW");

	strpol.strp_op = SYSTR_POLICY_ASSIGN;
	strpol.strp_pid = pid;

	if (ioctl(cfd, STRIOCPOLICY, &strpol) == -1)
		err(1, "STRIOCPOLICY ASSIGN");

	/* Permit all system calls. */
	for (i = 0; i < NSYSCALLS; i++) {
		strpol.strp_op = SYSTR_POLICY_MODIFY;
		strpol.strp_code = i;
		strpol.strp_policy = SYSTR_POLICY_PERMIT;

		if (ioctl(cfd, STRIOCPOLICY, &strpol) == -1)
			err(1, "STRIOCPOLICY MODIFY");
	}

	/* Ask us about open(2) system calls. */
	strpol.strp_op = SYSTR_POLICY_MODIFY;
	strpol.strp_code = 5; /* open(2) */
	strpol.strp_policy = SYSTR_POLICY_ASK;

	if (ioctl(cfd, STRIOCPOLICY, &strpol) == -1)
		err(1, "STRIOCPOLICY MODIFY");

	/*
	 * Now this process just answers requests for the operations the
	 * traced process performs that we have requested systrace to ask
	 * us about.
	 */
	while ((n = read(cfd, &strmsg, sizeof(strmsg))) ==
	    sizeof(strmsg)) {
		switch (strmsg.msg_type) {
		case SYSTR_MSG_ASK:
			/* Print out the path argument to open(2). */
			memcpy(&p, &strmsg.msg_data.msg_ask.args,
			    sizeof(p));
			printf("open(");
			do {
				memset(&strio, 0, sizeof(strio));
				strio.strio_pid = strmsg.msg_pid;
				strio.strio_op = SYSTR_READ;
				strio.strio_offs = p;
				strio.strio_addr = &c;
				strio.strio_len = 1;

				if (ioctl(cfd, STRIOCIO, &strio) == -1)
					err(1, "STRIOCIO");
				putchar(c);
				(unsigned char *)p += sizeof(char);
			} while (c != '\e0');
			printf(")\en");

			memset(&strans, 0, sizeof(strans));
			strans.stra_pid = strmsg.msg_pid;
			strans.stra_seqnr = strmsg.msg_seqnr;
			strans.stra_policy = SYSTR_POLICY_PERMIT;

			if (ioctl(cfd, STRIOCANSWER, &strans) == -1)
				err(1, "STRIOCANSWER");
			break;
		}
	}
	if (n == -1)
		err(1, "read");
	close(cfd);
	exit(0);
}
.Ed
.Sh SEE ALSO
.Xr systrace 1 ,
.Xr ioctl 2 ,
.Xr read 2 ,
.Xr options 4 ,
.Xr securelevel 7
.Sh HISTORY
The
.Nm
facility first appeared in
.Ox 3.2 .
.\" .Sh BUGS
.Sh CAVEATS
When creating new policies, if
.Va strp_maxents
is not large enough to accommodate any system calls needed for
fundamental process operations, the traced process will block forever.
